import jsonlines
import json
from tqdm import tqdm
new_datas = []
#with open("ultrachat_700k.jsonl") as f:
from datasets import load_dataset

datas = load_dataset("WildChat",split="train")
for li, line in tqdm(enumerate(datas)):
        #try:
        #    line = json.loads(line)
        #except:
        #    continue
        convs = []
        for ci, conv in enumerate(line["conversation"]):
            if ci % 2 == 0:
                convs.append({"from":"human","value":conv["content"]})
            else:
                convs.append({"from":"gpt","value":conv["content"]})
        new_datas.append({"id":f"wildchat_{ci}","conversations":convs})

with jsonlines.open("WildChat_fastchat.jsonl","w") as wf:
    for d in tqdm(new_datas):
        wf.write(d)
